Contents

library(SingleCellExperiment)
library(scater)
library(scran)
library(data.table)
library(destiny)
library(tidyverse)
library(matrixcalc)
library(ComplexHeatmap)
library(visNetwork)
library(prettyR)
library(igraph)
library(destiny)
library(muvis)
library(limma)
library(biomaRt)
library(ggplot2)
ido <- read.table("/Users/elihei/Downloads/Ido_data/GSE72857_umitab.txt")
clusters <- read.csv("/Users/elihei/Downloads/Ido_data/MAP.csv", header = F)
rownames(clusters) <- clusters$V1
meta_data <- fread("/Users/elihei/Downloads/Ido_data/GSE72857_experimental_design.txt", skip = 19, header = T)
meta_data <- data.frame(meta_data)
rownames(meta_data) <- meta_data$Well_ID
rownames(ido) <- strsplit2(rownames(ido), ";")[,1]
htseq <- read.table("/Users/elihei/Downloads/nestorowa_corrected_log2_transformed_counts.txt")
htseq <- t(htseq)

cell_type <- read.table("../data/new_batch/all_cell_types.txt", header = T)
cell_type <- cell_type[colnames(htseq),]
cell_type <- cell_type[1:10]
is.one <- apply(cell_type, 1, function(x) names(which(x == 1)))
cell_types <- is.one %>% map(function(x) paste(x, collapse = " "))
cell_types <- unlist(cell_types)
cell_types[which(cell_types == "")] <- "LTHSC"
loop_types <- cell_types[which((cell_types %in% c("CMP_broad", "GMP_broad", "LMPP_broad")) | grepl("^MPP", cell_types))]
loop_types <- names(loop_types)
hvg <- hv.genes(ido, 500)
hvg <- rownames(ido)[hvg]
hvg_sym <- hvg[hvg %in% rownames(htseq)]
g <- ggm1(data = data.frame(t(ido[hvg_sym,])), rho = 0.25)
g$graph$network
t.matrix <- abs(g$wi)
t.matrix <- ifelse(t.matrix > 0.05, t.matrix, 0)
diag(t.matrix) <- 0
t.matrix <- data.frame(t.matrix)
outs <- which(colSums(t.matrix, na.rm = T) == 0)
t.matrix <- t.matrix[-outs, -outs]
hvg_sym <- hvg_sym[-outs]
t.matrix <- t(scale(t.matrix, center=FALSE, scale=colSums(t.matrix)))

1 Euclidean Metric on Whole Data

d.matrix <- dist(t(htseq))
write.csv(as.matrix(d.matrix), "../results/euc_htseq.csv")
d.matrix <- read.csv("../results/euc_htseq.csv", row.names = 1)


dm <- DiffusionMap(data = data.frame(sample = colnames(htseq), col = cell_types), distance = as.dist(d.matrix))
dpt <- DPT(dm)

df <- data.frame(DPT1 = dpt$DC1, DPT2 = dpt$DC2, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT2, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 2))

df <- data.frame(DPT2 = dpt$DC2, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT2, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(2, 3))

df <- data.frame(DPT1 = dpt$DC1, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 3))

2 GRN Metric on Whole Data

# d.matrix <- dist.matrix(htseq[hvg_sym, ], m = 1, k = 5)
# write.csv(as.matrix(d.matrix), "../results/gmm_htseq.csv")
d.matrix <- read.csv("../results/gmm_htseq.csv", row.names = 1)


dm <- DiffusionMap(data = data.frame(sample = colnames(htseq), col = cell_types), distance = as.dist(d.matrix))
dpt <- DPT(dm)

df <- data.frame(DPT1 = dpt$DC1, DPT2 = dpt$DC2, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT2, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 2))

df <- data.frame(DPT2 = dpt$DC2, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT2, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(2, 3))

df <- data.frame(DPT1 = dpt$DC1, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 3))

3 Euclidean Metric on Loop

# d.matrix <- dist(t(htseq[, loop_types]))
# write.csv(as.matrix(d.matrix), "../results/euc_htseq_loop.csv")
d.matrix <- read.csv("../results/euc_htseq_loop.csv", row.names = 1)
cell_types[which(grepl("^MPP", cell_types))] <- "MPP"

dm <- DiffusionMap(data = data.frame(sample = loop_types, col = cell_types[loop_types]), distance = as.dist(d.matrix))
dpt <- DPT(dm)

df <- data.frame(DPT1 = dpt$DC1, DPT2 = dpt$DC2, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT2, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 2))

df <- data.frame(DPT2 = dpt$DC2, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT2, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(2, 3))

df <- data.frame(DPT1 = dpt$DC1, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 3))

4 GRN Metric on Loop

# d.matrix <- dist.matrix(htseq[hvg_sym, loop_types], m = 1, k = 5)
# write.csv(as.matrix(d.matrix), "../results/ggm_htseq_loop.csv")
d.matrix <- read.csv("../results/ggm_htseq_loop.csv", row.names = 1)


outs <- which(d.matrix > 90, arr.ind = TRUE)
outs <- outs[,1]
outs <- table(outs)
outs <- which(outs > 100)
outs <- as.numeric(names(outs))

dm <- DiffusionMap(data = data.frame(sample = loop_types[-outs], col = cell_types[loop_types[-outs]]), distance = as.dist(d.matrix[-outs, -outs]))
dpt <- DPT(dm)

df <- data.frame(DPT1 = dpt$DC1, DPT2 = dpt$DC2, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT2, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 2))

df <- data.frame(DPT2 = dpt$DC2, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT2, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(2, 3))

df <- data.frame(DPT1 = dpt$DC1, DPT3 = dpt$DC3, col = dpt$col)
ggplot(df, aes(x = DPT1, y = DPT3, color = col)) + geom_point()

plot.DPT(dpt, dcs = c(1, 3))